In [1]:
import numpy as np
import tensorflow as tf

Compare speed python list vs numpy ops

Let's implement standard deviation function. And compare computation time for 10 million numbers.


In [2]:
n = 10 ** 7

In [3]:
# Implementation using python list
def std(x:list):
    x_mean = sum(x)/len(x)
    y = sum([(v - x_mean) ** 2 for v in x])/len(x)
    return y**0.5

In [4]:
%time std(range(n))


CPU times: user 1.32 s, sys: 131 ms, total: 1.45 s
Wall time: 1.45 s
Out[4]:
2886751.3459482347

In [5]:
# Implementation using numpy array function
def std_np(x):
    x_mean = np.sum(x)/len(x)
    return (((x - x_mean) ** 2).mean())** 0.5

In [6]:
%time std_np(np.arange(n))


CPU times: user 162 ms, sys: 48.3 ms, total: 211 ms
Wall time: 106 ms
Out[6]:
2886751.3459480824

As we can see numpy function much fater than that implemtated on python list. There are a built-in function in numpy to compute the standard deviation. Verify the std computed in all three techniques give same result.


In [7]:
%time np.std(np.arange(int(1e7)))


CPU times: user 333 ms, sys: 16.2 ms, total: 349 ms
Wall time: 59 ms
Out[7]:
2886751.3459480824

In [8]:
%%time 
n_input = tf.placeholder(dtype=tf.float64)
x = tf.range(0, n_input)
x_mean = tf.reduce_mean(x)
x_std = tf.sqrt(tf.reduce_mean(tf.square(tf.subtract(x, x_mean))))
with tf.Session() as sess:
    print(sess.run([x_std], feed_dict={n_input: n}))


[2886751.345948096]
CPU times: user 583 ms, sys: 19.1 ms, total: 602 ms
Wall time: 85.7 ms

Matrix multiplication

Usecase - solve Normal Equations

http://mlwiki.org/index.php/Normal_Equation


In [9]:
np.random.seed(1)
W = np.array([2.3, - 5.7, 8.9]).T
b = 1.2
X = np.random.random((10, 3))
y = np.dot(X, W)
print("W: ", W)


W:  [ 2.3 -5.7  8.9]

In [10]:
print("y: ", y)


y:  [-3.14568107  0.68066983  1.98993286  4.94832331 -4.29127963  4.13577737
  6.32033813  6.60215509 -2.32666945  6.93726089]

In [11]:
W_estimate = np.linalg.inv(X.T.dot(X)).dot(X.T).dot(y)
W_estimate


Out[11]:
array([ 2.3, -5.7,  8.9])

Covariance Matrix


In [12]:
np.random.seed(1230)
X = np.random.ranf((5, 3))
X


Out[12]:
array([[0.27702631, 0.36855193, 0.64431478],
       [0.78019793, 0.50860458, 0.52375554],
       [0.84079088, 0.36703687, 0.67039217],
       [0.83824478, 0.68695113, 0.10454645],
       [0.43739591, 0.312447  , 0.25789323]])

In [13]:
n = X.shape[0]
X0 = X - np.mean(X, axis = 0)
(X0.T).dot(X0)/n


Out[13]:
array([[ 0.05438659,  0.01918617, -0.00915189],
       [ 0.01918617,  0.01840192, -0.01705645],
       [-0.00915189, -0.01705645,  0.04950637]])

In [14]:
np.cov(X, ddof=0, rowvar=False)


Out[14]:
array([[ 0.05438659,  0.01918617, -0.00915189],
       [ 0.01918617,  0.01840192, -0.01705645],
       [-0.00915189, -0.01705645,  0.04950637]])

In [15]:
np.var(X[:, 0])


Out[15]:
0.05438658851819618

In [16]:
np.cov(X[:,0], X[:,1], ddof=0)


Out[16]:
array([[0.05438659, 0.01918617],
       [0.01918617, 0.01840192]])

Eigen Value Decomposition

Read about Eigen, SVD, PCA decomposition https://www.cc.gatech.edu/~dellaert/pubs/svd-note.pdf


In [17]:
cx = np.cov(X, rowvar=False)
cx


Out[17]:
array([[ 0.06798324,  0.02398272, -0.01143987],
       [ 0.02398272,  0.0230024 , -0.02132056],
       [-0.01143987, -0.02132056,  0.06188297]])

In [18]:
e, v = np.linalg.eig(cx)
e, v


Out[18]:
(array([0.09171233, 0.05316521, 0.00799107]),
 array([[-0.7030196 , -0.64191217, -0.3061245 ],
        [-0.42282087,  0.03115894,  0.90567744],
        [ 0.57182686, -0.76614482,  0.29331921]]))

In [19]:
Z = X - X.mean(axis=0)
Z


Out[19]:
array([[-0.35770485, -0.08016637,  0.20413434],
       [ 0.14546677,  0.05988628,  0.0835751 ],
       [ 0.20605972, -0.08168143,  0.23021173],
       [ 0.20351361,  0.23823282, -0.33563398],
       [-0.19733525, -0.1362713 , -0.1822872 ]])

In [20]:
Z.mean(axis=0)


Out[20]:
array([ 2.22044605e-17, -4.44089210e-17, -2.22044605e-17])

In [21]:
U, D, V = np.linalg.svd(Z)

In [22]:
U


Out[22]:
array([[-0.66387938,  0.15335678, -0.54128459,  0.37391911,  0.32084477],
       [ 0.13174705, -0.33728933, -0.19140748, -0.53739058,  0.73719276],
       [-0.03518989, -0.67481681,  0.38890969,  0.58453227,  0.22462241],
       [ 0.71940261,  0.29042389, -0.30770774,  0.47836284,  0.2731278 ],
       [-0.15208039,  0.56832548,  0.65149012,  0.02978059,  0.47807055]])

In [23]:
D


Out[23]:
array([0.60568087, 0.46115164, 0.17878555])

In [24]:
V


Out[24]:
array([[ 0.7030196 ,  0.42282087, -0.57182686],
       [-0.64191217,  0.03115894, -0.76614482],
       [ 0.3061245 , -0.90567744, -0.29331921]])

U and V are unitary matrix


In [25]:
U.dot(U.T)


Out[25]:
array([[ 1.00000000e+00, -1.09255124e-16,  1.25679661e-17,
         1.98321425e-16, -1.01653038e-16],
       [-1.09255124e-16,  1.00000000e+00, -2.54001297e-16,
         9.28867142e-17,  1.24526333e-16],
       [ 1.25679661e-17, -2.54001297e-16,  1.00000000e+00,
         1.18816684e-16,  3.68935290e-16],
       [ 1.98321425e-16,  9.28867142e-17,  1.18816684e-16,
         1.00000000e+00, -1.65612590e-16],
       [-1.01653038e-16,  1.24526333e-16,  3.68935290e-16,
        -1.65612590e-16,  1.00000000e+00]])

In [26]:
V.dot(V.T)


Out[26]:
array([[ 1.00000000e+00, -1.17976663e-16,  4.11484180e-16],
       [-1.17976663e-16,  1.00000000e+00,  1.18705807e-16],
       [ 4.11484180e-16,  1.18705807e-16,  1.00000000e+00]])

Vectors of U and V are orthogonal


In [27]:
U[0].dot(U[1]), U[0].dot(U[2]), U[1].dot(U[2])


Out[27]:
(-1.249000902703301e-16, 2.7755575615628914e-17, -2.7755575615628914e-16)

In [28]:
V[0].dot(V[1]), V[0].dot(V[2]), V[1].dot(V[2])


Out[28]:
(-1.1102230246251565e-16, 4.163336342344337e-16, 1.1102230246251565e-16)

In [29]:
X_0 = np.zeros_like(X)

In [30]:
np.fill_diagonal(X_0, D)
X_0


Out[30]:
array([[0.60568087, 0.        , 0.        ],
       [0.        , 0.46115164, 0.        ],
       [0.        , 0.        , 0.17878555],
       [0.        , 0.        , 0.        ],
       [0.        , 0.        , 0.        ]])

In [31]:
U.dot(X_0).dot(V)


Out[31]:
array([[-0.35770485, -0.08016637,  0.20413434],
       [ 0.14546677,  0.05988628,  0.0835751 ],
       [ 0.20605972, -0.08168143,  0.23021173],
       [ 0.20351361,  0.23823282, -0.33563398],
       [-0.19733525, -0.1362713 , -0.1822872 ]])

In [ ]: